library(newsflash)
library(ggalt) # github version
library(hrbrmisc) # github only
library(tidyverse)
library(ggplot2)
library(scales)
library(lubridate)
library(stringr)


wh_list <- read.csv("72attacks.csv", stringsAsFactors=F)
wh_list$keyword <- wh_list$Location
wh_list$keyword <- gsub(",.*", "", wh_list$keyword)
wh_list$keyword <- gsub("\\(.*", "", wh_list$keyword)
wh_list$keyword <- str_to_title(wh_list$keyword)
wh_list$keyword2 <- gsub(".*,", "", wh_list$Location)
wh_list$keyword2 <- str_to_title(wh_list$keyword2)
wh_list$month <- gsub(", .*", "", wh_list$Date)
wh_list$year <- gsub(".*,", "", wh_list$Date)
wh_list$start <- paste0(wh_list$month, " 1, ", wh_list$year)
wh_list$start <- mdy(wh_list$start)
wh_list$end <- wh_list$start+30

wh_list$ID<-seq.int(nrow(wh_list))

stations <- list_networks(widget=FALSE)
## # A tibble: 53 × 3
##                       keyword                             network
##                         <chr>                               <chr>
## 1                    NATIONAL               All National Networks
## 2                     ALJAZAM                   Aljazeera America
## 3                   BLOOMBERG                           Bloomberg
## 4                        CNBC                                CNBC
## 5                         CNN                                 CNN
## 6                         FBC                        FOX Business
## 7                    FOXNEWSW                            FOX News
## 8                       MSNBC                               MSNBC
## 9               INTERNATIONAL          All International Networks
## 10                 BBCNEWSSEG                            BBC News
## 11                  AFFNETALL              All Affiliate Networks
## 12                 AFFNET_ABC              ABC Affiliate Stations
## 13                 AFFNET_CBS              CBS Affiliate Stations
## 14                 AFFNET_FOX              FOX Affiliate Stations
## 15                AFFNET_MYTV             MYTV Affiliate Stations
## 16                 AFFNET_NBC              NBC Affiliate Stations
## 17                 AFFNET_PBS              PBS Affiliate Stations
## 18                 AFFMARKALL               All Affiliate Markets
## 19           AFFMARKET_Boston           Boston Affiliate Stations
## 20     AFFMARKET_Cedar Rapids     Cedar Rapids Affiliate Stations
## 21        AFFMARKET_Charlotte        Charlotte Affiliate Stations
## 22       AFFMARKET_Cincinnati       Cincinnati Affiliate Stations
## 23        AFFMARKET_Cleveland        Cleveland Affiliate Stations
## 24 AFFMARKET_Colorado Springs Colorado Springs Affiliate Stations
## 25         AFFMARKET_Columbia         Columbia Affiliate Stations
## 26  AFFMARKET_Dakota Dunes SD  Dakota Dunes SD Affiliate Stations
## 27    AFFMARKET_Daytona Beach    Daytona Beach Affiliate Stations
## 28           AFFMARKET_Denver           Denver Affiliate Stations
## 29       AFFMARKET_Des Moines       Des Moines Affiliate Stations
## 30           AFFMARKET_Durham           Durham Affiliate Stations
## 31        AFFMARKET_Goldsboro        Goldsboro Affiliate Stations
## 32       AFFMARKET_Greenville       Greenville Affiliate Stations
## 33          AFFMARKET_Hampton          Hampton Affiliate Stations
## 34        AFFMARKET_Las Vegas        Las Vegas Affiliate Stations
## 35        AFFMARKET_Lynchburg        Lynchburg Affiliate Stations
## 36            AFFMARKET_Miami            Miami Affiliate Stations
## 37       AFFMARKET_Newport KY       Newport KY Affiliate Stations
## 38          AFFMARKET_Norfolk          Norfolk Affiliate Stations
## 39          AFFMARKET_Orlando          Orlando Affiliate Stations
## 40     AFFMARKET_Philadelphia     Philadelphia Affiliate Stations
## 41       AFFMARKET_Portsmouth       Portsmouth Affiliate Stations
## 42           AFFMARKET_Pueblo           Pueblo Affiliate Stations
## 43          AFFMARKET_Raleigh          Raleigh Affiliate Stations
## 44             AFFMARKET_Reno             Reno Affiliate Stations
## 45          AFFMARKET_Roanoke          Roanoke Affiliate Stations
## 46    AFFMARKET_San Francisco    San Francisco Affiliate Stations
## 47   AFFMARKET_Shaker Heights   Shaker Heights Affiliate Stations
## 48       AFFMARKET_Sioux City       Sioux City Affiliate Stations
## 49   AFFMARKET_St. Petersburg   St. Petersburg Affiliate Stations
## 50            AFFMARKET_Tampa            Tampa Affiliate Stations
## 51   AFFMARKET_Virginia Beach   Virginia Beach Affiliate Stations
## 52    AFFMARKET_Washington DC    Washington DC Affiliate Stations
## 53         AFFMARKET_Waterloo         Waterloo Affiliate Stations
## # ... with 1 more variables: date_range <chr>
colnames(stations) <- c("keyword", "station", "dates")
stations$dates <- NULL

Each incident

for (i in 1:nrow(wh_list)) {
starts <- wh_list$start[i]
ends <- wh_list$end[i]

queries <- map2(starts, ends, function(x, y) {
  query_tv(wh_list$keyword[i], "attack", timespan="custom", start_date=x, end_date=y)
})

queries2 <- map2(starts, ends, function(x, y) {
  query_tv(wh_list$keyword2[i], "attack", timespan="custom", start_date=x, end_date=y)
})

if (!is.null(queries[[1]])) {
queries_timeline <- map_df(queries, "timeline")


queries_df <- queries_timeline %>%
  group_by(station) %>%
  summarize(count=sum(value))

stat_m<- left_join(stations, queries_df)
stat_m <- subset(stat_m, !is.na(count))
stat_m$ID <- i
stat_m$total <- sum(stat_m$count)

if (i == 1) {
  stations_m <- stat_m
} else {
  stations_m <- rbind(stations_m, stat_m)
}

count(queries_timeline, station, wt=value, sort=TRUE) %>%
  mutate(pct=n/sum(n), pct_lab=sprintf("%s (%s)", scales::comma(n), scales::percent(pct)),
         station=factor(station, levels=rev(station))) -> timeline_df

gg <- ggplot(timeline_df, aes(y=station, x=pct))
gg <- gg + geom_lollipop(point.colour="steelblue", point.size=3, horizontal=TRUE)
gg <- gg + geom_text(label=timeline_df$pct_lab, hjust=-.3)
gg <- gg + scale_x_continuous(labels=percent,
                              breaks=seq(0, 1, by=0.2), limits=c(0, 1))
gg <- gg + labs(x=NULL, y=NULL,
                title=paste0("Percent of all sentences on TV channels mentioning ", wh_list$keyword[i]),
                subtitle=paste0("Search parameters: Primary [", wh_list$keyword[i], "]; Context: [attack]; n=", sum(stat_m$total)),
                caption="Source: Internet Archive Television News Archive via #rstats newsflash package")
gg <- gg + theme_minimal(base_family="Arial Narrow")
gg <- gg + theme(panel.grid.major.y=element_blank())
gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(axis.line.y=element_line(color="#2b2b2b", size=0.15))
gg <- gg + theme(axis.text.y=element_text(margin=margin(r=-5, l=0)))
gg <- gg + theme(plot.margin=unit(rep(30, 4), "pt"))
gg <- gg + theme(plot.title=element_text(face="bold"))
gg <- gg + theme(plot.subtitle=element_text(margin=margin(b=10)))
gg <- gg + theme(plot.caption=element_text(size=8, margin=margin(t=10)))
print(gg)

} else if ((is.null(queries[[1]]) & !is.null(queries2[[1]]))) {
  queries_timeline <- map_df(queries2, "timeline")
  
  
  queries_df <- queries_timeline %>%
    group_by(station) %>%
    summarize(count=sum(value))
  
  stat_m<- left_join(stations, queries_df)
  stat_m <- subset(stat_m, !is.na(count))
  stat_m$ID <- i
  stat_m$total <- sum(stat_m$count)
  
  if (i == 1) {
    stations_m <- stat_m
  } else {
    stations_m <- rbind(stations_m, stat_m)
  }
  
  count(queries_timeline, station, wt=value, sort=TRUE) %>%
    mutate(pct=n/sum(n), pct_lab=sprintf("%s (%s)", scales::comma(n), scales::percent(pct)),
           station=factor(station, levels=rev(station))) -> timeline_df
  
  gg <- ggplot(timeline_df, aes(y=station, x=pct))
  gg <- gg + geom_lollipop(point.colour="steelblue", point.size=3, horizontal=TRUE)
  gg <- gg + geom_text(label=timeline_df$pct_lab, hjust=-.3)
  gg <- gg + scale_x_continuous(labels=percent,
                                breaks=seq(0, 1, by=0.2), limits=c(0, 1))
  gg <- gg + labs(x=NULL, y=NULL,
                  title=paste0("Percent of all sentences on TV channels mentioning ", wh_list$keyword[i]),
                  subtitle=paste0("Search parameters: Primary [", wh_list$keyword[i], "]; Context: [attack]; n=", sum(stat_m$total)),
                  caption="Source: Internet Archive Television News Archive via #rstats newsflash package")
  gg <- gg + theme_minimal(base_family="Arial Narrow")
  gg <- gg + theme(panel.grid.major.y=element_blank())
  gg <- gg + theme(panel.grid.minor=element_blank())
  gg <- gg + theme(axis.line.y=element_line(color="#2b2b2b", size=0.15))
  gg <- gg + theme(axis.text.y=element_text(margin=margin(r=-5, l=0)))
  gg <- gg + theme(plot.margin=unit(rep(30, 4), "pt"))
  gg <- gg + theme(plot.title=element_text(face="bold"))
  gg <- gg + theme(plot.subtitle=element_text(margin=margin(b=10)))
  gg <- gg + theme(plot.caption=element_text(size=8, margin=margin(t=10)))
  print(gg)
  
} else {
stat_temp <- data.frame(keyword="", station="", count=0, ID=i, total=0)
stations_m <- rbind(stations_m, stat_temp)
}
}

Total incidents

wh_list_narrow <- select(wh_list, Location, Date, ID)
wh_list_narrow <- left_join(stations_m, wh_list_narrow, by="ID")
wh_list_narrow$loc_id <- paste0(wh_list_narrow$Location, " (", wh_list_narrow$Date, ")")
wh_list_short <- wh_list_narrow %>%
  group_by(loc_id) %>%
  filter(row_number()==1)
wh_list_short$keyword <- NULL
wh_list_short$station <- NULL
wh_list_short$count <- NULL


write.csv(wh_list_narrow, "wh_list_narrow.csv")

gg <- ggplot(data=wh_list_narrow, aes(x=reorder(loc_id, -ID), y=count))
gg <- gg + geom_bar(stat="identity", aes(fill=station))
gg <- gg + geom_text(data=wh_list_short, aes(y=total, label=total), size=3, hjust=-.3)
gg <- gg + coord_flip()
gg <- gg + scale_y_continuous(labels=comma, limits=c(0, 7000))
gg <- gg + labs(x=NULL, y=NULL,
                title="Mentions of international attacks on TV news",
                subtitle="Search parameters: Primary [Location]; Context: [attack]",
                caption="Source: Internet Archive Television News Archive via #rstats newsflash package")
gg <- gg + theme_minimal(base_family="Arial Narrow")
gg <- gg + theme(panel.grid.major.y=element_blank())
gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(axis.line.y=element_line(color="#2b2b2b", size=0.15))
gg <- gg + theme(axis.text.y=element_text(margin=margin(r=-5, l=0)))
gg <- gg + theme(plot.margin=unit(rep(30, 4), "pt"))
gg <- gg + theme(plot.title=element_text(face="bold"))
gg <- gg + theme(plot.subtitle=element_text(margin=margin(b=10)))
gg <- gg + theme(plot.caption=element_text(size=8, margin=margin(t=10)))
gg

wh_list_narrow2 <- as.data.frame(wh_list_narrow)
gg <- ggplot(data=subset(wh_list_narrow, station=="FOX News"), aes(x=reorder(loc_id, -ID), y=count))
gg <- gg + geom_bar(stat="identity", aes(fill=station))
gg <- gg + geom_text(data=subset(wh_list_narrow, station=="FOX News"), aes(y=count, label=count), size=3, hjust=-.3)
gg <- gg + coord_flip()
gg <- gg + scale_y_continuous(labels=comma, limits=c(0, 7000))
gg <- gg + labs(x=NULL, y=NULL,
                title="Mentions of international attacks on FOX News",
                subtitle="Search parameters: Primary [Location]; Context: [attack]",
                caption="Source: Internet Archive Television News Archive via #rstats newsflash package")
gg <- gg + theme_minimal(base_family="Arial Narrow")
gg <- gg + theme(panel.grid.major.y=element_blank())
gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(axis.line.y=element_line(color="#2b2b2b", size=0.15))
gg <- gg + theme(axis.text.y=element_text(margin=margin(r=-5, l=0)))
gg <- gg + theme(plot.margin=unit(rep(30, 4), "pt"))
gg <- gg + theme(plot.title=element_text(face="bold"))
gg <- gg + theme(plot.subtitle=element_text(margin=margin(b=10)))
gg <- gg + theme(plot.caption=element_text(size=8, margin=margin(t=10)))
gg